import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns
import datetime
# Use seaborn style defaults and set default figure size
sns.set_style("whitegrid")
dfmod = pd.read_csv('data/aqi_data_mod.csv')
dfobs = pd.read_csv('data/aqi_data_obs.csv')
dfmod.isna().sum()
date 0 hour 0 mod_PM2.5 0 mod_PM2.5_stdev 0 mod_PM10 0 mod_PM10_stdev 0 dtype: int64
dfobs.isna().sum()
date 0 hour 0 obs_PM2.5 33 obs_PM2.5_stdev 52 obs_PM10 33 obs_PM10_stdev 52 dtype: int64
dfmod.head()
| date | hour | mod_PM2.5 | mod_PM2.5_stdev | mod_PM10 | mod_PM10_stdev | |
|---|---|---|---|---|---|---|
| 0 | 2020-11-1 | 0:00:00 | 152.61 | 51.64 | 241.71 | 105.98 |
| 1 | 2020-11-1 | 1:00:00 | 159.82 | 53.31 | 254.49 | 111.38 |
| 2 | 2020-11-1 | 2:00:00 | 160.04 | 51.57 | 252.47 | 109.05 |
| 3 | 2020-11-1 | 3:00:00 | 156.73 | 40.47 | 242.03 | 87.21 |
| 4 | 2020-11-1 | 4:00:00 | 156.32 | 41.95 | 232.03 | 84.02 |
dfobs.head()
| date | hour | obs_PM2.5 | obs_PM2.5_stdev | obs_PM10 | obs_PM10_stdev | |
|---|---|---|---|---|---|---|
| 0 | 2020-11-01 | 0:00:00 | 387.90 | 158.73 | 561.44 | 165.77 |
| 1 | 2020-11-01 | 1:00:00 | 348.11 | 143.39 | 486.04 | 159.66 |
| 2 | 2020-11-01 | 2:00:00 | 326.03 | 120.39 | 516.67 | 134.42 |
| 3 | 2020-11-01 | 3:00:00 | 302.90 | 111.29 | 514.41 | 125.31 |
| 4 | 2020-11-01 | 4:00:00 | 309.05 | 104.38 | 520.55 | 102.16 |
dfmod['datetime'] = pd.to_datetime(dfmod.date) + pd.to_timedelta(dfmod.hour)
dfobs['datetime'] = pd.to_datetime(dfobs.date) + pd.to_timedelta(dfobs.hour)
dfmod = dfmod.drop(['date', 'hour'], axis=1)
dfobs = dfobs.drop(['date', 'hour'], axis=1)
dfmod = dfmod.set_index(dfmod.datetime)
dfobs = dfobs.set_index(dfobs.datetime)
dfobs.head()
| obs_PM2.5 | obs_PM2.5_stdev | obs_PM10 | obs_PM10_stdev | datetime | |
|---|---|---|---|---|---|
| datetime | |||||
| 2020-11-01 00:00:00 | 387.90 | 158.73 | 561.44 | 165.77 | 2020-11-01 00:00:00 |
| 2020-11-01 01:00:00 | 348.11 | 143.39 | 486.04 | 159.66 | 2020-11-01 01:00:00 |
| 2020-11-01 02:00:00 | 326.03 | 120.39 | 516.67 | 134.42 | 2020-11-01 02:00:00 |
| 2020-11-01 03:00:00 | 302.90 | 111.29 | 514.41 | 125.31 | 2020-11-01 03:00:00 |
| 2020-11-01 04:00:00 | 309.05 | 104.38 | 520.55 | 102.16 | 2020-11-01 04:00:00 |
# Do interpolatoin of the observed data
dfobs = dfobs.interpolate(method = 'time')
dfobs.isna().sum()
obs_PM2.5 0 obs_PM2.5_stdev 0 obs_PM10 0 obs_PM10_stdev 0 datetime 0 dtype: int64
# Plot of PM2.5 model and data for Nov, Dec and Jan
fig = plt.figure(figsize=(20,15))
plt.subplot(311)
sns.lineplot(data = dfmod.loc["2020-11-01":"2020-11-30"], x='datetime', y='mod_PM2.5', err_style='band', label='model', color='red')
# plt.xticks(rotation= 10)
sns.lineplot(data = dfobs.loc["2020-11-01":"2020-11-30"], x='datetime', y='obs_PM2.5', err_style='band', label='observed', color = 'blue')
#plt.xticks(rotation= 10)
plt.ylabel('PM2.5 concentration (in $\mu g/m^3$)', fontsize=14)
plt.xlabel('Time', fontsize=14)
plt.legend(loc=1, prop={'size': 14})
plt.title('November 2020', fontsize=18)
plt.subplot(312)
sns.lineplot(data = dfmod.loc["2020-12-01":"2020-12-31"], x='datetime', y='mod_PM2.5', err_style='band', label='model', color='red')
# plt.xticks(rotation= 10)
sns.lineplot(data = dfobs.loc["2020-12-01":"2020-12-31"], x='datetime', y='obs_PM2.5', err_style='band', label='observed', color = 'blue')
#plt.xticks(rotation= 10)
plt.ylabel('PM2.5 concentration (in $\mu g/m^3$)', fontsize=14)
plt.xlabel('Time', fontsize=14)
plt.legend(loc=1, prop={'size': 14})
plt.title('December 2020', fontsize=18)
plt.subplot(313)
sns.lineplot(data = dfmod.loc["2021-01-01":"2021-01-31"], x='datetime', y='mod_PM2.5', err_style='band', label='model', color='red')
# plt.xticks(rotation= 10)
sns.lineplot(data = dfobs.loc["2021-01-01":"2021-01-31"], x='datetime', y='obs_PM2.5', err_style='band', label='observed', color = 'blue')
#plt.xticks(rotation= 10)
plt.ylabel('PM2.5 concentration (in $\mu g/m^3$)', fontsize=14)
plt.xlabel('Time', fontsize=14)
plt.legend(loc=1, prop={'size': 14})
plt.title('January 2021', fontsize=18)
fig.suptitle('PM2.5 concentration over Delhi', fontsize=24, y=0.99)
fig.tight_layout()
plt.savefig('./images/pm25ts.png')
# Plot of PM10 model and data for Nov, Dec and Jan
fig = plt.figure(figsize=(20,15))
plt.subplot(311)
sns.lineplot(data = dfmod.loc["2020-11-01":"2020-11-30"], x='datetime', y='mod_PM10', err_style='band', label='model', color='red')
# plt.xticks(rotation= 10)
sns.lineplot(data = dfobs.loc["2020-11-01":"2020-11-30"], x='datetime', y='obs_PM10', err_style='band', label='observed', color = 'blue')
#plt.xticks(rotation= 10)
plt.ylabel('PM10 concentration (in $\mu g/m^3$)', fontsize=14)
plt.xlabel('Time', fontsize=14)
plt.legend(loc=1, prop={'size': 14})
plt.title('November 2020', fontsize=18)
plt.subplot(312)
sns.lineplot(data = dfmod.loc["2020-12-01":"2020-12-31"], x='datetime', y='mod_PM10', err_style='band', label='model', color='red')
# plt.xticks(rotation= 10)
sns.lineplot(data = dfobs.loc["2020-12-01":"2020-12-31"], x='datetime', y='obs_PM10', err_style='band', label='observed', color = 'blue')
#plt.xticks(rotation= 10)
plt.ylabel('PM10 concentration (in $\mu g/m^3$)', fontsize=14)
plt.xlabel('Time', fontsize=14)
plt.legend(loc=1, prop={'size': 14})
plt.title('December 2020', fontsize=18)
plt.subplot(313)
sns.lineplot(data = dfmod.loc["2021-01-01":"2021-01-31"], x='datetime', y='mod_PM10', err_style='band', label='model', color='red')
# plt.xticks(rotation= 10)
sns.lineplot(data = dfobs.loc["2021-01-01":"2021-01-31"], x='datetime', y='obs_PM10', err_style='band', label='observed', color = 'blue')
#plt.xticks(rotation= 10)
plt.ylabel('PM10 concentration (in $\mu g/m^3$)', fontsize=14)
plt.xlabel('Time', fontsize=14)
plt.legend(loc=1, prop={'size': 14})
plt.title('January 2021', fontsize=18)
fig.suptitle('PM10 concentration over Delhi', fontsize=24, y=0.99)
fig.tight_layout()
plt.savefig('./images/pm10ts.png')
# Extracting the different values for the months
dfmod['month'] = dfmod.index.month
dfobs['month'] = dfobs.index.month
dfmod
| mod_PM2.5 | mod_PM2.5_stdev | mod_PM10 | mod_PM10_stdev | datetime | month | |
|---|---|---|---|---|---|---|
| datetime | ||||||
| 2020-11-01 00:00:00 | 152.61 | 51.64 | 241.71 | 105.98 | 2020-11-01 00:00:00 | 11 |
| 2020-11-01 01:00:00 | 159.82 | 53.31 | 254.49 | 111.38 | 2020-11-01 01:00:00 | 11 |
| 2020-11-01 02:00:00 | 160.04 | 51.57 | 252.47 | 109.05 | 2020-11-01 02:00:00 | 11 |
| 2020-11-01 03:00:00 | 156.73 | 40.47 | 242.03 | 87.21 | 2020-11-01 03:00:00 | 11 |
| 2020-11-01 04:00:00 | 156.32 | 41.95 | 232.03 | 84.02 | 2020-11-01 04:00:00 | 11 |
| ... | ... | ... | ... | ... | ... | ... |
| 2021-01-31 19:00:00 | 262.13 | 176.70 | 477.64 | 333.23 | 2021-01-31 19:00:00 | 1 |
| 2021-01-31 20:00:00 | 280.50 | 190.57 | 515.65 | 363.73 | 2021-01-31 20:00:00 | 1 |
| 2021-01-31 21:00:00 | 280.55 | 165.43 | 531.10 | 333.06 | 2021-01-31 21:00:00 | 1 |
| 2021-01-31 22:00:00 | 317.31 | 179.83 | 602.42 | 358.34 | 2021-01-31 22:00:00 | 1 |
| 2021-01-31 23:00:00 | 346.14 | 208.83 | 654.82 | 396.42 | 2021-01-31 23:00:00 | 1 |
2208 rows × 6 columns
splits1 = dfmod.groupby('month')
splits2 = dfobs.groupby('month')
dfmod_jan = list(splits1)[0][1]
dfmod_nov = list(splits1)[1][1]
dfmod_dec = list(splits1)[2][1]
dfobs_jan = list(splits2)[0][1]
dfobs_nov = list(splits2)[1][1]
dfobs_dec = list(splits2)[2][1]
dfobs_jan
| obs_PM2.5 | obs_PM2.5_stdev | obs_PM10 | obs_PM10_stdev | datetime | month | |
|---|---|---|---|---|---|---|
| datetime | ||||||
| 2021-01-01 00:00:00 | 254.10 | 175.23 | 340.68 | 224.71 | 2021-01-01 00:00:00 | 1 |
| 2021-01-01 01:00:00 | 239.93 | 180.57 | 322.94 | 238.92 | 2021-01-01 01:00:00 | 1 |
| 2021-01-01 02:00:00 | 302.58 | 158.82 | 434.00 | 212.80 | 2021-01-01 02:00:00 | 1 |
| 2021-01-01 03:00:00 | 357.43 | 158.88 | 534.30 | 215.98 | 2021-01-01 03:00:00 | 1 |
| 2021-01-01 04:00:00 | 377.91 | 200.52 | 557.63 | 252.32 | 2021-01-01 04:00:00 | 1 |
| ... | ... | ... | ... | ... | ... | ... |
| 2021-01-31 19:00:00 | 283.41 | 132.55 | 473.15 | 212.18 | 2021-01-31 19:00:00 | 1 |
| 2021-01-31 20:00:00 | 258.25 | 111.13 | 404.22 | 162.62 | 2021-01-31 20:00:00 | 1 |
| 2021-01-31 21:00:00 | 259.50 | 102.70 | 420.72 | 145.34 | 2021-01-31 21:00:00 | 1 |
| 2021-01-31 22:00:00 | 279.51 | 105.07 | 433.31 | 156.45 | 2021-01-31 22:00:00 | 1 |
| 2021-01-31 23:00:00 | 264.59 | 84.86 | 419.70 | 138.79 | 2021-01-31 23:00:00 | 1 |
744 rows × 6 columns
# Plot of PM2.5 model and data for Nov, Dec and Jan with errorbars
fig = plt.figure(figsize=(20,15))
plt.subplot(311)
sns.lineplot(data = dfmod_nov, x='datetime', y='mod_PM2.5', err_style='band', label='model', color='red')
# plt.errorbar(dfmod_nov.index, dfmod_nov['mod_PM2.5'], yerr=dfmod_nov['mod_PM2.5_stdev'], alpha=0.15, ecolor='blue', fmt='o', mfc='blue', markersize=8, capsize=10)
plt.fill_between(dfmod_nov.index, dfmod_nov['mod_PM2.5'] + dfmod_nov['mod_PM2.5_stdev'], dfmod_nov['mod_PM2.5'] - dfmod_nov['mod_PM2.5_stdev'], alpha=0.15, color='red')
sns.lineplot(data = dfobs_nov, x='datetime', y='obs_PM2.5', err_style='band', label='observed', color='blue')
# plt.errorbar(dfobs_nov.index, dfobs_nov['obs_PM2.5'], yerr=dfobs_nov['obs_PM2.5_stdev'], alpha=0.15, ecolor='green', fmt='o', mfc='green', markersize=8, capsize=10)
plt.fill_between(dfobs_nov.index, dfobs_nov['obs_PM2.5'] + dfobs_nov['obs_PM2.5_stdev'], dfobs_nov['obs_PM2.5'] - dfobs_nov['obs_PM2.5_stdev'], alpha=0.15, color='blue')
plt.ylabel('PM2.5 concentration (in $\mu g/m^3$)', fontsize=14)
plt.xlabel('Time', fontsize=14)
plt.legend(loc=1, prop={'size': 15})
plt.title('November 2020', fontsize=18)
plt.subplot(312)
sns.lineplot(data = dfmod_dec, x='datetime', y='mod_PM2.5', err_style='band', label='model', color='red')
# plt.errorbar(dfmod_dec.index, dfmod_dec['mod_PM2.5'], yerr=dfmod_dec['mod_PM2.5_stdev'], alpha=0.15, ecolor='blue', fmt='o', mfc='blue', markersize=8, capsize=10)
plt.fill_between(dfmod_dec.index, dfmod_dec['mod_PM2.5'] + dfmod_dec['mod_PM2.5_stdev'], dfmod_dec['mod_PM2.5'] - dfmod_dec['mod_PM2.5_stdev'], alpha=0.15, color='red')
sns.lineplot(data = dfobs_dec, x='datetime', y='obs_PM2.5', err_style='band', label='observed', color='blue')
# plt.errorbar(dfobs_dec.index, dfobs_dec['obs_PM2.5'], yerr=dfobs_dec['obs_PM2.5_stdev'], alpha=0.15, ecolor='green', fmt='o', mfc='green', markersize=8, capsize=10)
plt.fill_between(dfobs_dec.index, dfobs_dec['obs_PM2.5'] + dfobs_dec['obs_PM2.5_stdev'], dfobs_dec['obs_PM2.5'] - dfobs_dec['obs_PM2.5_stdev'], alpha=0.15, color='blue')
plt.ylabel('PM2.5 concentration (in $\mu g/m^3$)', fontsize=14)
plt.xlabel('Time', fontsize=14)
plt.legend(loc=1, prop={'size': 15})
plt.title('December 2020', fontsize=18)
plt.subplot(313)
sns.lineplot(data = dfmod_jan, x='datetime', y='mod_PM2.5', err_style='band', label='model', color='red')
# plt.errorbar(dfmod_jan.index, dfmod_jan['mod_PM2.5'], yerr=dfmod_jan['mod_PM2.5_stdev'], alpha=0.15, ecolor='blue', fmt='o', mfc='blue', markersize=8, capsize=10)
plt.fill_between(dfmod_jan.index, dfmod_jan['mod_PM2.5'] + dfmod_jan['mod_PM2.5_stdev'], dfmod_jan['mod_PM2.5'] - dfmod_jan['mod_PM2.5_stdev'], alpha=0.15, color='red')
sns.lineplot(data = dfobs_jan, x='datetime', y='obs_PM2.5', err_style='band', label='observed', color='blue')
# plt.errorbar(dfobs_jan.index, dfobs_jan['obs_PM2.5'], yerr=dfobs_jan['obs_PM2.5_stdev'], alpha=0.15, ecolor='green', fmt='o', mfc='green', markersize=8, capsize=10)
plt.fill_between(dfobs_jan.index, dfobs_jan['obs_PM2.5'] + dfobs_jan['obs_PM2.5_stdev'], dfobs_jan['obs_PM2.5'] - dfobs_jan['obs_PM2.5_stdev'], alpha=0.15, color='blue')
plt.ylabel('PM2.5 concentration (in $\mu g/m^3$)', fontsize=14)
plt.xlabel('Time', fontsize=14)
plt.legend(loc=1, prop={'size': 15})
plt.title('January 2021', fontsize=18)
fig.suptitle('PM2.5 concentration over Delhi', fontsize=24, y=0.99)
fig.tight_layout()
plt.savefig('./images/pm25ts_err.png')
# Plot of PM10 model and data for Nov, Dec and Jan with errorbars
fig = plt.figure(figsize=(20,15))
plt.subplot(311)
sns.lineplot(data = dfmod_nov, x='datetime', y='mod_PM10', err_style='band', label='model', color='red')
# plt.errorbar(dfmod_nov.index, dfmod_nov['mod_PM10'], yerr=dfmod_nov['mod_PM10_stdev '], alpha=0.15, ecolor='blue', fmt='o', mfc='blue', markersize=8, capsize=10)
plt.fill_between(dfmod_nov.index, dfmod_nov['mod_PM10'] + dfmod_nov['mod_PM10_stdev '], dfmod_nov['mod_PM10'] - dfmod_nov['mod_PM10_stdev '], alpha=0.15, color='red')
sns.lineplot(data = dfobs_nov, x='datetime', y='obs_PM10', err_style='band', label='observed', color='blue')
# plt.errorbar(dfobs_nov.index, dfobs_nov['obs_PM10'], yerr=dfobs_nov['obs_PM10_stdev '], alpha=0.15, ecolor='green', fmt='o', mfc='green', markersize=8, capsize=10)
plt.fill_between(dfobs_nov.index, dfobs_nov['obs_PM10'] + dfobs_nov['obs_PM10_stdev '], dfobs_nov['obs_PM10'] - dfobs_nov['obs_PM10_stdev '], alpha=0.15, color='blue')
plt.ylabel('PM10 concentration (in $\mu g/m^3$)', fontsize=14)
plt.xlabel('Time', fontsize=14)
plt.legend(loc=1, prop={'size': 15})
plt.title('November 2020', fontsize=18)
plt.subplot(312)
sns.lineplot(data = dfmod_dec, x='datetime', y='mod_PM10', err_style='band', label='model', color='red')
# plt.errorbar(dfmod_nov.index, dfmod_nov['mod_PM10'], yerr=dfmod_nov['mod_PM10_stdev '], alpha=0.15, ecolor='blue', fmt='o', mfc='blue', markersize=8, capsize=10)
plt.fill_between(dfmod_dec.index, dfmod_dec['mod_PM10'] + dfmod_dec['mod_PM10_stdev '], dfmod_dec['mod_PM10'] - dfmod_dec['mod_PM10_stdev '], alpha=0.15, color='red')
sns.lineplot(data = dfobs_dec, x='datetime', y='obs_PM10', err_style='band', label='observed', color='blue')
# plt.errorbar(dfobs_nov.index, dfobs_nov['obs_PM10'], yerr=dfobs_nov['obs_PM10_stdev '], alpha=0.15, ecolor='green', fmt='o', mfc='green', markersize=8, capsize=10)
plt.fill_between(dfobs_dec.index, dfobs_dec['obs_PM10'] + dfobs_dec['obs_PM10_stdev '], dfobs_dec['obs_PM10'] - dfobs_dec['obs_PM10_stdev '], alpha=0.15, color='blue')
plt.ylabel('PM10 concentration (in $\mu g/m^3$)', fontsize=14)
plt.xlabel('Time', fontsize=14)
plt.legend(loc=1, prop={'size': 15})
plt.title('December 2020', fontsize=18)
plt.subplot(313)
sns.lineplot(data = dfmod_jan, x='datetime', y='mod_PM10', err_style='band', label='model', color='red')
# plt.errorbar(dfmod_nov.index, dfmod_nov['mod_PM10'], yerr=dfmod_nov['mod_PM10_stdev '], alpha=0.15, ecolor='blue', fmt='o', mfc='blue', markersize=8, capsize=10)
plt.fill_between(dfmod_jan.index, dfmod_jan['mod_PM10'] + dfmod_jan['mod_PM10_stdev '], dfmod_jan['mod_PM10'] - dfmod_jan['mod_PM10_stdev '], alpha=0.15, color='red')
sns.lineplot(data = dfobs_jan, x='datetime', y='obs_PM10', err_style='band', label='observed', color='blue')
# plt.errorbar(dfobs_nov.index, dfobs_nov['obs_PM10'], yerr=dfobs_nov['obs_PM10_stdev '], alpha=0.15, ecolor='green', fmt='o', mfc='green', markersize=8, capsize=10)
plt.fill_between(dfobs_jan.index, dfobs_jan['obs_PM10'] + dfobs_jan['obs_PM10_stdev '], dfobs_jan['obs_PM10'] - dfobs_jan['obs_PM10_stdev '], alpha=0.15, color='blue')
plt.ylabel('PM10 concentration (in $\mu g/m^3$)', fontsize=14)
plt.xlabel('Time', fontsize=14)
plt.legend(loc=1, prop={'size': 15})
plt.title('January 2021', fontsize=18)
fig.suptitle('PM10 concentration over Delhi', fontsize=24, y=0.99)
fig.tight_layout()
plt.savefig('./images/pm10ts_err.png')
# Function to plot diurnal variaiton for given dataframe as input
def plot_diurnal_variation(dfmod, dfobs, month):
# Taking the mean of all data for diurnal variation
dfmod['time'] = dfmod.index.time
dfobs['time'] = dfobs.index.time
splits3 = dfmod.groupby('time')
splits4 = dfobs.groupby('time')
mod_diurnal_pm25 = []
mod_diurnal_pm25_stdev = []
mod_diurnal_pm10 = []
mod_diurnal_pm10_stdev = []
obs_diurnal_pm25 = []
obs_diurnal_pm25_stdev = []
obs_diurnal_pm10 = []
obs_diurnal_pm10_stdev = []
for i in range(0,24):
temp1 = list(splits3)[i][1].describe().mean()
mod_diurnal_pm25.append(temp1[0])
mod_diurnal_pm25_stdev.append(temp1[1])
mod_diurnal_pm10.append(temp1[2])
mod_diurnal_pm10_stdev.append(temp1[3])
temp2 = list(splits4)[i][1].describe().mean()
obs_diurnal_pm25.append(temp2[0])
obs_diurnal_pm25_stdev.append(temp2[1])
obs_diurnal_pm10.append(temp2[2])
obs_diurnal_pm10_stdev.append(temp2[3])
times = [x for x in range(0,24)]
# Plot mean diurnal variation PM2.5
fig = plt.figure(figsize=(12,5))
plt.plot(times, mod_diurnal_pm25, color='red', label = 'model')
(_, caps, _) = plt.errorbar(times, mod_diurnal_pm25, yerr = mod_diurnal_pm25_stdev, \
alpha=0.3, ecolor='red', fmt='o', mfc='red', markersize=8, capsize=10)
# plt.fill_between(mod_diurnal_pm25, mod_diurnal_pm25 + mod_diurnal_pm25_stdev, mod_diurnal_pm25 - mod_diurnal_pm25_stdev, alpha=0.1, color='blue')
for cap in caps:
cap.set_markeredgewidth(1)
plt.plot(times, obs_diurnal_pm25, color='blue', label = 'observed')
(_, caps, _) = plt.errorbar(times, obs_diurnal_pm25, yerr = obs_diurnal_pm25_stdev, \
alpha=0.3, ecolor='blue', fmt='o', mfc='blue', markersize=8, capsize=10)
# plt.fill_between(obs_diurnal_pm25, obs_diurnal_pm25 + obs_diurnal_pm25_stdev, obs_diurnal_pm25 - obs_diurnal_pm25_stdev, alpha=0.1, color='green')
for cap in caps:
cap.set_markeredgewidth(1)
plt.ylabel('PM2.5 concentration (in $\mu g/m^3$)', fontsize=14)
plt.xlabel('Time', fontsize=14)
plt.legend(loc=1, prop={'size': 15})
plt.xticks(np.arange(0, 24, step=1))
if month == "combined":
plt.title('PM2.5 diurnal variation mean Overall winter', fontsize=18)
plt.savefig('./images/diurnal_pm25.png')
elif month == "november":
plt.title('PM2.5 diurnal variation mean for November 2020', fontsize=18)
plt.savefig('./images/diurnal_pm25_nov.png')
elif month == "december":
plt.title('PM2.5 diurnal variation mean for December 2020', fontsize=18)
plt.savefig('./images/diurnal_pm25_dec.png')
elif month == "january":
plt.title('PM2.5 diurnal variation mean for January 2021', fontsize=18)
plt.savefig('./images/diurnal_pm25_jan.png')
else:
print("Wrong month for pm2.5")
# Plot mean diurnal variation PM10
fig = plt.figure(figsize=(12,5))
plt.plot(times, mod_diurnal_pm10, color='red', label = 'model')
(_, caps, _) = plt.errorbar(times, mod_diurnal_pm10, yerr = mod_diurnal_pm10_stdev, \
alpha=0.3, ecolor='red', fmt='o', mfc='red', markersize=8, capsize=10)
# plt.fill_between(mod_diurnal_pm10, mod_diurnal_pm10 + mod_diurnal_pm10_stdev, mod_diurnal_pm10 - mod_diurnal_pm10_stdev, alpha=0.1, color='blue')
for cap in caps:
cap.set_markeredgewidth(1)
plt.plot(times, obs_diurnal_pm10, color='blue', label = 'observed')
(_, caps, _) = plt.errorbar(times, obs_diurnal_pm10, yerr = obs_diurnal_pm10_stdev, \
alpha=0.3, ecolor='blue', fmt='o', mfc='blue', markersize=8, capsize=10)
# plt.fill_between(obs_diurnal_pm10, obs_diurnal_pm10 + obs_diurnal_pm10_stdev, obs_diurnal_pm10 - obs_diurnal_pm10_stdev, alpha=0.1, color='green')
for cap in caps:
cap.set_markeredgewidth(1)
plt.ylabel('PM10 concentration (in $\mu g/m^3$)', fontsize=14)
plt.xlabel('Time (in hrs)', fontsize=14)
plt.legend(loc=1, prop={'size': 15})
plt.xticks(np.arange(0, 24, step=1))
if month == "combined":
plt.title('PM10 diurnal variation mean Overall winter', fontsize=18)
plt.savefig('./images/diurnal_pm10.png')
elif month == "november":
plt.title('PM10 diurnal variation mean for November 2020', fontsize=18)
plt.savefig('./images/diurnal_pm10_nov.png')
elif month == "december":
plt.title('PM10 diurnal variation mean for December 2020', fontsize=18)
plt.savefig('./images/diurnal_pm10_dec.png')
elif month == "january":
plt.title('PM10 diurnal variation mean for January 2021', fontsize=18)
plt.savefig('./images/diurnal_pm10_jan.png')
else:
print("Wrong month for pm10")
plot_diurnal_variation(dfmod, dfobs, "combined")
plot_diurnal_variation(dfmod_nov, dfobs_nov, "november")
plot_diurnal_variation(dfmod_dec, dfobs_dec, "december")
plot_diurnal_variation(dfmod_jan, dfobs_jan, "january")
dfmod['mod_pm2'] = dfmod['mod_PM2.5'].rolling(24).mean()
dfmod['mod_pm2_stdev'] = dfmod['mod_PM2.5_stdev'].rolling(24).mean()
dfmod['mod_pm10'] = dfmod['mod_PM10'].rolling(24).mean()
dfmod['mod_pm10_stdev'] = dfmod['mod_PM10_stdev '].rolling(24).mean()
dfobs['obs_pm2'] = dfobs['obs_PM2.5'].rolling(24).mean()
dfobs['obs_pm2_stdev'] = dfobs['obs_PM2.5_stdev'].rolling(24).mean()
dfobs['obs_pm10'] = dfobs['obs_PM10'].rolling(24).mean()
dfobs['obs_pm10_stdev'] = dfobs['obs_PM10_stdev '].rolling(24).mean()
dfmod = dfmod.drop(['mod_PM2.5', 'mod_PM2.5_stdev', 'mod_PM10', 'mod_PM10_stdev ', \
'datetime', 'month', 'time'], axis = 1)
dfobs = dfobs.drop(['obs_PM2.5', 'obs_PM2.5_stdev', 'obs_PM10', 'obs_PM10_stdev ', \
'datetime', 'month', 'time'], axis = 1)
dfobs.head(30)
| obs_pm2 | obs_pm2_stdev | obs_pm10 | obs_pm10_stdev | |
|---|---|---|---|---|
| datetime | ||||
| 2020-11-01 00:00:00 | NaN | NaN | NaN | NaN |
| 2020-11-01 01:00:00 | NaN | NaN | NaN | NaN |
| 2020-11-01 02:00:00 | NaN | NaN | NaN | NaN |
| 2020-11-01 03:00:00 | NaN | NaN | NaN | NaN |
| 2020-11-01 04:00:00 | NaN | NaN | NaN | NaN |
| 2020-11-01 05:00:00 | NaN | NaN | NaN | NaN |
| 2020-11-01 06:00:00 | NaN | NaN | NaN | NaN |
| 2020-11-01 07:00:00 | NaN | NaN | NaN | NaN |
| 2020-11-01 08:00:00 | NaN | NaN | NaN | NaN |
| 2020-11-01 09:00:00 | NaN | NaN | NaN | NaN |
| 2020-11-01 10:00:00 | NaN | NaN | NaN | NaN |
| 2020-11-01 11:00:00 | NaN | NaN | NaN | NaN |
| 2020-11-01 12:00:00 | NaN | NaN | NaN | NaN |
| 2020-11-01 13:00:00 | NaN | NaN | NaN | NaN |
| 2020-11-01 14:00:00 | NaN | NaN | NaN | NaN |
| 2020-11-01 15:00:00 | NaN | NaN | NaN | NaN |
| 2020-11-01 16:00:00 | NaN | NaN | NaN | NaN |
| 2020-11-01 17:00:00 | NaN | NaN | NaN | NaN |
| 2020-11-01 18:00:00 | NaN | NaN | NaN | NaN |
| 2020-11-01 19:00:00 | NaN | NaN | NaN | NaN |
| 2020-11-01 20:00:00 | NaN | NaN | NaN | NaN |
| 2020-11-01 21:00:00 | NaN | NaN | NaN | NaN |
| 2020-11-01 22:00:00 | NaN | NaN | NaN | NaN |
| 2020-11-01 23:00:00 | 179.448750 | 66.654167 | 335.294375 | 99.067708 |
| 2020-11-02 00:00:00 | 169.091667 | 63.607917 | 322.072708 | 97.627292 |
| 2020-11-02 01:00:00 | 159.435000 | 60.396667 | 311.264375 | 95.753542 |
| 2020-11-02 02:00:00 | 150.589583 | 58.083333 | 300.331042 | 95.091875 |
| 2020-11-02 03:00:00 | 143.366667 | 56.084167 | 291.565625 | 95.260208 |
| 2020-11-02 04:00:00 | 136.485000 | 54.558750 | 283.291458 | 96.382708 |
| 2020-11-02 05:00:00 | 133.243750 | 54.182500 | 278.833958 | 97.868542 |
# dfmod_daily = dfmod.groupby(dfmod.index.floor('D')).mean().drop('month', axis=1)
# dfobs_daily = dfobs.groupby(dfobs.index.floor('D')).mean().drop('month', axis=1)
# dfmod_daily
# Defining functions for finding CPCB AQI using pm2.5 and pm10 running mean data
def mod_aqi_pm2(c):
if c['mod_pm2'] > 0 and c['mod_pm2'] <= 30:
return (1.66*(c['mod_pm2'] - 0)) + 0
elif c['mod_pm2'] > 30 and c['mod_pm2'] <= 60:
return (1.66*(c['mod_pm2'] - 30)) + 50
elif c['mod_pm2'] > 60 and c['mod_pm2'] <= 90:
return (3.33*(c['mod_pm2'] - 60)) + 100
elif c['mod_pm2'] > 90 and c['mod_pm2'] <= 120:
return (3.33*(c['mod_pm2'] - 90)) + 200
elif c['mod_pm2'] > 120 and c['mod_pm2'] <= 250:
return (0.769*(c['mod_pm2'] - 120)) + 300
elif c['mod_pm2'] > 250:
return (0.769*(c['mod_pm2'] - 250)) + 400
else:
return np.nan
def mod_aqi_pm10(c):
if c['mod_pm10'] > 0 and c['mod_pm10'] <= 50:
return ((c['mod_pm10'] - 0)) + 0
elif c['mod_pm10'] > 50 and c['mod_pm10'] <= 100:
return ((c['mod_pm10'] - 50)) + 50
elif c['mod_pm10'] > 100 and c['mod_pm10'] <= 250:
return (0.66*(c['mod_pm10'] - 100)) + 100
elif c['mod_pm10'] > 250 and c['mod_pm10'] <= 350:
return ((c['mod_pm10'] - 250)) + 200
elif c['mod_pm10'] > 350 and c['mod_pm10'] <= 430:
return (1.25*(c['mod_pm10'] - 350)) + 300
elif c['mod_pm10'] > 430:
return (1.25*(c['mod_pm10'] - 430)) + 400
else:
return np.nan
def obs_aqi_pm2(c):
if c['obs_pm2'] > 0 and c['obs_pm2'] <= 30:
return (1.66*(c['obs_pm2'] - 0)) + 0
elif c['obs_pm2'] > 30 and c['obs_pm2'] <= 60:
return (1.66*(c['obs_pm2'] - 30)) + 50
elif c['obs_pm2'] > 60 and c['obs_pm2'] <= 90:
return (3.33*(c['obs_pm2'] - 60)) + 100
elif c['obs_pm2'] > 90 and c['obs_pm2'] <= 120:
return (3.33*(c['obs_pm2'] - 90)) + 200
elif c['obs_pm2'] > 120 and c['obs_pm2'] <= 250:
return (0.769*(c['obs_pm2'] - 120)) + 300
elif c['obs_pm2'] > 250:
return (0.769*(c['obs_pm2'] - 250)) + 400
else:
return np.nan
def obs_aqi_pm10(c):
if c['obs_pm10'] > 0 and c['obs_pm10'] <= 50:
return ((c['obs_pm10'] - 0)) + 0
elif c['obs_pm10'] > 50 and c['obs_pm10'] <= 100:
return ((c['obs_pm10'] - 50)) + 50
elif c['obs_pm10'] > 100 and c['obs_pm10'] <= 250:
return (0.66*(c['obs_pm10'] - 100)) + 100
elif c['obs_pm10'] > 250 and c['obs_pm10'] <= 350:
return ((c['obs_pm10'] - 250)) + 200
elif c['obs_pm10'] > 350 and c['obs_pm10'] <= 430:
return (1.25*(c['obs_pm10'] - 350)) + 300
elif c['obs_pm10'] > 430:
return (1.25*(c['obs_pm10'] - 430)) + 400
else:
return np.nan
dfmod['mod_aqi_pm2'] = dfmod.apply(mod_aqi_pm2, axis=1)
dfmod['mod_aqi_pm10'] = dfmod.apply(mod_aqi_pm10, axis=1)
dfobs['obs_aqi_pm2'] = dfobs.apply(obs_aqi_pm2, axis=1)
dfobs['obs_aqi_pm10'] = dfobs.apply(obs_aqi_pm10, axis=1)
dfmod.tail()
| mod_pm2 | mod_pm2_stdev | mod_pm10 | mod_pm10_stdev | mod_aqi_pm2 | mod_aqi_pm10 | |
|---|---|---|---|---|---|---|
| datetime | ||||||
| 2021-01-31 19:00:00 | 169.556250 | 94.367917 | 304.631250 | 181.144583 | 338.108756 | 254.631250 |
| 2021-01-31 20:00:00 | 171.562083 | 97.251250 | 308.490417 | 186.558750 | 339.651242 | 258.490417 |
| 2021-01-31 21:00:00 | 173.837083 | 98.862083 | 313.481667 | 190.334167 | 341.400717 | 263.481667 |
| 2021-01-31 22:00:00 | 178.011667 | 101.294583 | 322.102083 | 195.576667 | 344.610972 | 272.102083 |
| 2021-01-31 23:00:00 | 183.647083 | 105.107500 | 333.397917 | 202.817500 | 348.944607 | 283.397917 |
quality_mod_pm25 = []
# 0 -> good
# 1 -> satisfactory
# 2 -> moderately polluted
# 3 -> poor
# 4 -> very poor
# 5 -> severe
for aqipm25mod in dfmod['mod_aqi_pm2']:
if aqipm25mod >= 0 and aqipm25mod <= 50:
quality_mod_pm25.append(0)
elif aqipm25mod > 50 and aqipm25mod <= 100:
quality_mod_pm25.append(1)
elif aqipm25mod > 100 and aqipm25mod <= 200:
quality_mod_pm25.append(2)
elif aqipm25mod > 200 and aqipm25mod <= 300:
quality_mod_pm25.append(3)
elif aqipm25mod > 300 and aqipm25mod <= 400:
quality_mod_pm25.append(4)
# elif aqipm25mod > 400 and aqipm25mod <= 500:
# quality_mod_pm25.append(5)
else:
quality_mod_pm25.append(5)
quality_mod_pm10 = []
for aqipm10mod in dfmod['mod_aqi_pm10']:
if aqipm10mod >= 0 and aqipm10mod <= 50:
quality_mod_pm10.append(0)
elif aqipm10mod > 50 and aqipm10mod <= 100:
quality_mod_pm10.append(1)
elif aqipm10mod > 100 and aqipm10mod <= 200:
quality_mod_pm10.append(2)
elif aqipm10mod > 200 and aqipm10mod <= 300:
quality_mod_pm10.append(3)
elif aqipm10mod > 300 and aqipm10mod <= 400:
quality_mod_pm10.append(4)
# elif aqipm10mod > 400 and aqipm10mod <= 500:
# quality_mod_pm10.append(5)
else:
quality_mod_pm10.append(5)
quality_obs_pm25 = []
for aqipm25obs in dfobs['obs_aqi_pm2']:
if aqipm25obs >= 0 and aqipm25obs <= 50:
quality_obs_pm25.append(0)
elif aqipm25obs > 50 and aqipm25obs <= 100:
quality_obs_pm25.append(1)
elif aqipm25obs > 100 and aqipm25obs <= 200:
quality_obs_pm25.append(2)
elif aqipm25obs > 200 and aqipm25obs <= 300:
quality_obs_pm25.append(3)
elif aqipm25obs > 300 and aqipm25obs <= 400:
quality_obs_pm25.append(4)
# elif aqipm25obs > 400 and aqipm25obs <= 500:
# quality_obs_pm25.append(5)
else:
quality_obs_pm25.append(5)
quality_obs_pm10 = []
for aqipm10obs in dfobs['obs_aqi_pm10']:
if aqipm10obs >= 0 and aqipm10obs <= 50:
quality_obs_pm10.append(0)
elif aqipm10obs > 50 and aqipm10obs <= 100:
quality_obs_pm10.append(1)
elif aqipm10obs > 100 and aqipm10obs <= 200:
quality_obs_pm10.append(2)
elif aqipm10obs > 200 and aqipm10obs <= 300:
quality_obs_pm10.append(3)
elif aqipm10obs > 300 and aqipm10obs <= 400:
quality_obs_pm10.append(4)
# elif aqipm10obs > 400 and aqipm10obs <= 500:
# quality_obs_pm10.append(5)
else:
quality_obs_pm10.append(5)
dfmod['quality_mod_pm25'] = quality_mod_pm25
dfmod['quality_mod_pm10'] = quality_mod_pm10
dfobs['quality_obs_pm25'] = quality_obs_pm25
dfobs['quality_obs_pm10'] = quality_obs_pm10
dfmod = dfmod.dropna()
dfobs = dfobs.dropna()
df = pd.concat([dfmod, dfobs], axis=1)
df
| mod_pm2 | mod_pm2_stdev | mod_pm10 | mod_pm10_stdev | mod_aqi_pm2 | mod_aqi_pm10 | quality_mod_pm25 | quality_mod_pm10 | obs_pm2 | obs_pm2_stdev | obs_pm10 | obs_pm10_stdev | obs_aqi_pm2 | obs_aqi_pm10 | quality_obs_pm25 | quality_obs_pm10 | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| datetime | ||||||||||||||||
| 2020-11-01 23:00:00 | 127.504583 | 59.183333 | 219.647500 | 118.243750 | 305.771025 | 178.967350 | 4 | 2 | 179.448750 | 66.654167 | 335.294375 | 99.067708 | 345.716089 | 285.294375 | 4 | 3 |
| 2020-11-02 00:00:00 | 124.977083 | 59.065833 | 217.360833 | 118.301250 | 303.827377 | 177.458150 | 4 | 2 | 169.091667 | 63.607917 | 322.072708 | 97.627292 | 337.751492 | 272.072708 | 4 | 3 |
| 2020-11-02 01:00:00 | 122.575833 | 59.254167 | 215.445000 | 118.876667 | 301.980816 | 176.193700 | 4 | 2 | 159.435000 | 60.396667 | 311.264375 | 95.753542 | 330.325515 | 261.264375 | 4 | 3 |
| 2020-11-02 02:00:00 | 120.275833 | 59.528750 | 213.872083 | 119.806250 | 300.212116 | 175.155575 | 4 | 2 | 150.589583 | 58.083333 | 300.331042 | 95.091875 | 323.523390 | 250.331042 | 4 | 3 |
| 2020-11-02 03:00:00 | 117.873750 | 60.251250 | 212.123333 | 121.276667 | 292.819588 | 174.001400 | 3 | 2 | 143.366667 | 56.084167 | 291.565625 | 95.260208 | 317.968967 | 241.565625 | 4 | 3 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 2021-01-31 19:00:00 | 169.556250 | 94.367917 | 304.631250 | 181.144583 | 338.108756 | 254.631250 | 4 | 3 | 163.405000 | 70.887083 | 312.015000 | 130.423750 | 333.378445 | 262.015000 | 4 | 3 |
| 2021-01-31 20:00:00 | 171.562083 | 97.251250 | 308.490417 | 186.558750 | 339.651242 | 258.490417 | 4 | 3 | 166.218750 | 72.538750 | 314.311250 | 131.777500 | 335.542219 | 264.311250 | 4 | 3 |
| 2021-01-31 21:00:00 | 173.837083 | 98.862083 | 313.481667 | 190.334167 | 341.400717 | 263.481667 | 4 | 3 | 168.468750 | 73.830417 | 315.406250 | 129.229583 | 337.272469 | 265.406250 | 4 | 3 |
| 2021-01-31 22:00:00 | 178.011667 | 101.294583 | 322.102083 | 195.576667 | 344.610972 | 272.102083 | 4 | 3 | 171.528750 | 74.962083 | 319.125000 | 131.076667 | 339.625609 | 269.125000 | 4 | 3 |
| 2021-01-31 23:00:00 | 183.647083 | 105.107500 | 333.397917 | 202.817500 | 348.944607 | 283.397917 | 4 | 3 | 174.874583 | 75.262917 | 323.839583 | 131.555833 | 342.198555 | 273.839583 | 4 | 3 |
2185 rows × 16 columns
df.shape
(2185, 16)
# Conditions for Critical (severe conditions only)
def get_critical_score(data):
temp = data.copy(deep=True)
conditions1 = [
(temp['quality_mod_pm25'] == 5) & (temp['quality_obs_pm25'] == 5),
(temp['quality_mod_pm25'] == 5) & (temp['quality_obs_pm25'] != 5),
(temp['quality_mod_pm25'] != 5) & (temp['quality_obs_pm25'] == 5),
(temp['quality_mod_pm25'] != 5) & (temp['quality_obs_pm25'] != 5)]
choices1 = ['a', 'b', 'c', 'd']
conditions2 = [
(temp['quality_mod_pm10'] == 5) & (temp['quality_obs_pm10'] == 5),
(temp['quality_mod_pm10'] == 5) & (temp['quality_obs_pm10'] != 5),
(temp['quality_mod_pm10'] != 5) & (temp['quality_obs_pm10'] == 5),
(temp['quality_mod_pm10'] != 5) & (temp['quality_obs_pm10'] != 5)]
choices2 = ['a', 'b', 'c', 'd']
temp['category_pm25'] = np.select(conditions1, choices1, default=np.nan)
temp['category_pm10'] = np.select(conditions2, choices2, default=np.nan)
test1 = temp.groupby('category_pm25')
test2 = temp.groupby('category_pm10')
# Need to write a program to give appropriate values to a,b,c and d
list25 = []
list10 = []
for i in range(0, len(list(test1))):
list25.append(list(test1)[i][0])
for i in range(0, len(list(test2))):
list10.append(list(test2)[i][0])
result1 = all(elem in list25 for elem in ['a', 'b', 'c', 'd'])
result2 = all(elem in list25 for elem in ['a','b','c'])
result3 = all(elem in list25 for elem in ['a','b'])
result4 = all(elem in list10 for elem in ['a', 'b', 'c', 'd'])
result5 = all(elem in list10 for elem in ['a','b','c'])
result6 = all(elem in list10 for elem in ['a','b'])
if result1:
pm25dict = {"a":list(test1)[0][1].shape[0], "b":list(test1)[1][1].shape[0], \
"c":list(test1)[2][1].shape[0], "d":list(test1)[3][1].shape[0]}
elif result2:
pm25dict = {"a":list(test1)[0][1].shape[0], "b":list(test1)[1][1].shape[0], \
"c":list(test1)[2][1].shape[0], "d":0}
elif result3:
pm25dict = {"a":list(test1)[0][1].shape[0], "b":list(test1)[1][1].shape[0], "c":0, "d":0}
else:
print("Error in pm25dict. The following are the values of list25")
print(list25)
if result4:
pm10dict = {"a":list(test2)[0][1].shape[0], "b":list(test2)[1][1].shape[0], \
"c":list(test2)[2][1].shape[0], "d":list(test2)[3][1].shape[0]}
elif result5:
pm10dict = {"a":list(test2)[0][1].shape[0], "b":list(test2)[1][1].shape[0], \
"c":list(test2)[2][1].shape[0], "d":0}
elif result6:
pm10dict = {"a":list(test2)[0][1].shape[0], "b":list(test2)[1][1].shape[0], "c":0, "d":0}
else:
print("Error in pm10dict. The following are the values of list10")
print(list10)
a = pm25dict['a']
b = pm25dict['b']
c = pm25dict['c']
d = pm25dict['d']
A = ((a + d)/(a+b+c+d)) * 100 # Accuracy of events
FAR = ((b)/(a+b)) * 100 # False event forecase
POD = ((a)/(a+c)) * 100 # Expected events
CSI = ((a)/(a+b+c)) * 100 # Events and event forecasts that were hits
FOM = ((c)/(a+c))*100 # Surprise events
FOH = ((a)/(a+b))*100 # Correct event forecasts
PON = ((d)/(b+d))*100 # Expected non events
POFD = ((b)/(b+d))*100 # Unexpected non events
DFR = ((c)/(c+d))*100 # False non-event forecasts
FOCN = ((d)/(c+d))*100 # Correct non-event forecasts
TSS = ((a)/(a+c)) - ((b)/(b+d)) # True skill statistic (Expected events - unexpected non-events)
Heidke = (((a+c)*(d-b))+((a-c)*(d+b)))/(((a+c)*(c+d))+((a+b)*(b+d)))
print("Performance metrics or Skill score for Critical PM2.5 are:\n")
print("A = ", A)
print("FAR = ", FAR)
print("POD = ", POD)
print("CSI = ", CSI)
print("FOM = ", FOM)
print("FOH = ", FOH)
print("PON = ", PON)
print("POFD = ", POFD)
print("DFR = ", DFR)
print("FOCN = ", FOCN)
print("TSS = ", TSS)
print("Heidke = ", Heidke, "\n")
a = pm10dict['a']
b = pm10dict['b']
c = pm10dict['c']
d = pm10dict['d']
A = ((a + d)/(a+b+c+d)) * 100 # Accuracy of events
FAR = ((b)/(a+b)) * 100 # False event forecase
POD = ((a)/(a+c)) * 100 # Expected events
CSI = ((a)/(a+b+c)) * 100 # Events and event forecasts that were hits
FOM = ((c)/(a+c))*100 # Surprise events
FOH = ((a)/(a+b))*100 # Correct event forecasts
PON = ((d)/(b+d))*100 # Expected non events
POFD = ((b)/(b+d))*100 # Unexpected non events
DFR = ((c)/(c+d))*100 # False non-event forecasts
FOCN = ((d)/(c+d))*100 # Correct non-event forecasts
TSS = ((a)/(a+c)) - ((b)/(b+d)) # True skill statistic (Expected events - unexpected non-events)
Heidke = (((a+c)*(d-b))+((a-c)*(d+b)))/(((a+c)*(c+d))+((a+b)*(b+d)))
print("Performance metrics or Skill score for Critical PM10 are: \n")
print("A = ", A)
print("FAR = ", FAR)
print("POD = ", POD)
print("CSI = ", CSI)
print("FOM = ", FOM)
print("FOH = ", FOH)
print("PON = ", PON)
print("POFD = ", POFD)
print("DFR = ", DFR)
print("FOCN = ", FOCN)
print("TSS = ", TSS)
print("Heidke = ", Heidke)
get_critical_score(df)
Performance metrics or Skill score for Critical PM2.5 are: A = 89.65675057208237 FAR = 25.423728813559322 POD = 59.299191374663074 CSI = 49.327354260089685 FOM = 40.700808625336926 FOH = 74.57627118644068 PON = 95.86549062844543 POFD = 4.134509371554575 DFR = 7.989417989417989 FOCN = 92.01058201058201 TSS = 0.551646820031085 Heidke = 0.6005807557913809 Performance metrics or Skill score for Critical PM10 are: A = 88.69565217391305 FAR = 35.68904593639576 POD = 55.487804878048784 CSI = 42.42424242424242 FOM = 44.51219512195122 FOH = 64.31095406360424 PON = 94.56112008616047 POFD = 5.438879913839526 DFR = 7.676130389064142 FOCN = 92.32386961093586 TSS = 0.5004892496420926 Heidke = 0.5304497092798162
# Conditions for Critical (severe conditions only)
def get_veryunhealthy_score(data):
temp = data.copy(deep=True)
conditions1 = [
(temp['quality_mod_pm25'] >= 4) & (temp['quality_obs_pm25'] >= 4),
(temp['quality_mod_pm25'] >= 4) & (temp['quality_obs_pm25'] < 4),
(temp['quality_mod_pm25'] < 4) & (temp['quality_obs_pm25'] >= 4),
(temp['quality_mod_pm25'] < 4) & (temp['quality_obs_pm25'] < 4)]
choices1 = ['a', 'b', 'c', 'd']
conditions2 = [
(temp['quality_mod_pm10'] >= 4) & (temp['quality_obs_pm10'] >= 4),
(temp['quality_mod_pm10'] >= 4) & (temp['quality_obs_pm10'] < 4),
(temp['quality_mod_pm10'] < 4) & (temp['quality_obs_pm10'] >= 4),
(temp['quality_mod_pm10'] < 4) & (temp['quality_obs_pm10'] < 4)]
choices2 = ['a', 'b', 'c', 'd']
temp['category_pm25'] = np.select(conditions1, choices1, default=np.nan)
temp['category_pm10'] = np.select(conditions2, choices2, default=np.nan)
test1 = temp.groupby('category_pm25')
test2 = temp.groupby('category_pm10')
# Need to write a program to give appropriate values to a,b,c and d
list25 = []
list10 = []
for i in range(0, len(list(test1))):
list25.append(list(test1)[i][0])
for i in range(0, len(list(test2))):
list10.append(list(test2)[i][0])
result1 = all(elem in list25 for elem in ['a', 'b', 'c', 'd'])
result2 = all(elem in list25 for elem in ['a','b','c'])
result3 = all(elem in list25 for elem in ['a','b'])
result4 = all(elem in list10 for elem in ['a', 'b', 'c', 'd'])
result5 = all(elem in list10 for elem in ['a','b','c'])
result6 = all(elem in list10 for elem in ['a','b'])
if result1:
pm25dict = {"a":list(test1)[0][1].shape[0], "b":list(test1)[1][1].shape[0], \
"c":list(test1)[2][1].shape[0], "d":list(test1)[3][1].shape[0]}
elif result2:
pm25dict = {"a":list(test1)[0][1].shape[0], "b":list(test1)[1][1].shape[0], \
"c":list(test1)[2][1].shape[0], "d":0}
elif result3:
pm25dict = {"a":list(test1)[0][1].shape[0], "b":list(test1)[1][1].shape[0], "c":0, "d":0}
else:
print("Error in pm25dict. The following are the values of list25")
print(list25)
if result4:
pm10dict = {"a":list(test2)[0][1].shape[0], "b":list(test2)[1][1].shape[0], \
"c":list(test2)[2][1].shape[0], "d":list(test2)[3][1].shape[0]}
elif result5:
pm10dict = {"a":list(test2)[0][1].shape[0], "b":list(test2)[1][1].shape[0], \
"c":list(test2)[2][1].shape[0], "d":0}
elif result6:
pm10dict = {"a":list(test2)[0][1].shape[0], "b":list(test2)[1][1].shape[0], "c":0, "d":0}
else:
print("Error in pm10dict. The following are the values of list10")
print(list10)
a = pm25dict['a']
b = pm25dict['b']
c = pm25dict['c']
d = pm25dict['d']
A = ((a + d)/(a+b+c+d)) * 100 # Accuracy of events
FAR = ((b)/(a+b)) * 100 # False event forecase
POD = ((a)/(a+c)) * 100 # Expected events
CSI = ((a)/(a+b+c)) * 100 # Events and event forecasts that were hits
FOM = ((c)/(a+c))*100 # Surprise events
FOH = ((a)/(a+b))*100 # Correct event forecasts
PON = ((d)/(b+d))*100 # Expected non events
POFD = ((b)/(b+d))*100 # Unexpected non events
DFR = ((c)/(c+d))*100 # False non-event forecasts
FOCN = ((d)/(c+d))*100 # Correct non-event forecasts
TSS = ((a)/(a+c)) - ((b)/(b+d)) # True skill statistic (Expected events - unexpected non-events)
Heidke = (((a+c)*(d-b))+((a-c)*(d+b)))/(((a+c)*(c+d))+((a+b)*(b+d)))
print("Performance metrics or Skill score for Very Unhealthy PM2.5 are:\n")
print("A = ", A)
print("FAR = ", FAR)
print("POD = ", POD)
print("CSI = ", CSI)
print("FOM = ", FOM)
print("FOH = ", FOH)
print("PON = ", PON)
print("POFD = ", POFD)
print("DFR = ", DFR)
print("FOCN = ", FOCN)
print("TSS = ", TSS)
print("Heidke = ", Heidke, "\n")
a = pm10dict['a']
b = pm10dict['b']
c = pm10dict['c']
d = pm10dict['d']
A = ((a + d)/(a+b+c+d)) * 100 # Accuracy of events
FAR = ((b)/(a+b)) * 100 # False event forecase
POD = ((a)/(a+c)) * 100 # Expected events
CSI = ((a)/(a+b+c)) * 100 # Events and event forecasts that were hits
FOM = ((c)/(a+c))*100 # Surprise events
FOH = ((a)/(a+b))*100 # Correct event forecasts
PON = ((d)/(b+d))*100 # Expected non events
POFD = ((b)/(b+d))*100 # Unexpected non events
DFR = ((c)/(c+d))*100 # False non-event forecasts
FOCN = ((d)/(c+d))*100 # Correct non-event forecasts
TSS = ((a)/(a+c)) - ((b)/(b+d)) # True skill statistic (Expected events - unexpected non-events)
Heidke = (((a+c)*(d-b))+((a-c)*(d+b)))/(((a+c)*(c+d))+((a+b)*(b+d)))
print("Performance metrics or Skill score for Very Unhealthy PM10 are: \n")
print("A = ", A)
print("FAR = ", FAR)
print("POD = ", POD)
print("CSI = ", CSI)
print("FOM = ", FOM)
print("FOH = ", FOH)
print("PON = ", PON)
print("POFD = ", POFD)
print("DFR = ", DFR)
print("FOCN = ", FOCN)
print("TSS = ", TSS)
print("Heidke = ", Heidke)
get_veryunhealthy_score(df)
Performance metrics or Skill score for Very Unhealthy PM2.5 are: A = 78.53546910755149 FAR = 15.626725565985645 POD = 89.14819136522753 CSI = 76.51477215823735 FOM = 10.851808634772462 FOH = 84.37327443401436 PON = 39.91507430997877 POFD = 60.08492569002123 DFR = 49.73262032085562 FOCN = 50.26737967914438 TSS = 0.29063265675206307 Heidke = 0.3140874568361672 Performance metrics or Skill score for Very Unhealthy PM10 are: A = 79.67963386727689 FAR = 30.71324599708879 POD = 67.13681241184767 CSI = 51.73913043478261 FOM = 32.86318758815233 FOH = 69.28675400291121 PON = 85.70460704607046 POFD = 14.29539295392954 DFR = 15.554072096128172 FOCN = 84.44592790387183 TSS = 0.5284141945791813 Heidke = 0.5327090199191367
# Conditions for Unhealthy (poor, very poor, severe conditions only)
def get_unhealthy_score(data):
temp = data.copy(deep=True)
conditions1 = [
(temp['quality_mod_pm25'] >= 3) & (temp['quality_obs_pm25'] >= 3),
(temp['quality_mod_pm25'] >= 3) & (temp['quality_obs_pm25'] < 3),
(temp['quality_mod_pm25'] < 3) & (temp['quality_obs_pm25'] >= 3),
(temp['quality_mod_pm25'] < 3) & (temp['quality_obs_pm25'] < 3)]
choices1 = ['a', 'b', 'c', 'd']
conditions2 = [
(temp['quality_mod_pm10'] >= 3) & (temp['quality_obs_pm10'] >= 3),
(temp['quality_mod_pm10'] >= 3) & (temp['quality_obs_pm10'] < 3),
(temp['quality_mod_pm10'] < 3) & (temp['quality_obs_pm10'] >= 3),
(temp['quality_mod_pm10'] < 3) & (temp['quality_obs_pm10'] < 3)]
choices2 = ['a', 'b', 'c', 'd']
temp['category_pm25'] = np.select(conditions1, choices1, default=np.nan)
temp['category_pm10'] = np.select(conditions2, choices2, default=np.nan)
test1 = temp.groupby('category_pm25')
test2 = temp.groupby('category_pm10')
# Need to write a program to give appropriate values to a,b,c and d
list25 = []
list10 = []
for i in range(0, len(list(test1))):
list25.append(list(test1)[i][0])
for i in range(0, len(list(test2))):
list10.append(list(test2)[i][0])
result1 = all(elem in list25 for elem in ['a', 'b', 'c', 'd'])
result2 = all(elem in list25 for elem in ['a','b','c'])
result3 = all(elem in list25 for elem in ['a','b'])
result4 = all(elem in list10 for elem in ['a', 'b', 'c', 'd'])
result5 = all(elem in list10 for elem in ['a','b','c'])
result6 = all(elem in list10 for elem in ['a','b'])
if result1:
pm25dict = {"a":list(test1)[0][1].shape[0], "b":list(test1)[1][1].shape[0], \
"c":list(test1)[2][1].shape[0], "d":list(test1)[3][1].shape[0]}
elif result2:
pm25dict = {"a":list(test1)[0][1].shape[0], "b":list(test1)[1][1].shape[0], \
"c":list(test1)[2][1].shape[0], "d":0}
elif result3:
pm25dict = {"a":list(test1)[0][1].shape[0], "b":list(test1)[1][1].shape[0], "c":0, "d":0}
else:
print("Error in pm25dict. The following are the values of list25")
print(list25)
if result4:
pm10dict = {"a":list(test2)[0][1].shape[0], "b":list(test2)[1][1].shape[0], \
"c":list(test2)[2][1].shape[0], "d":list(test2)[3][1].shape[0]}
elif result5:
pm10dict = {"a":list(test2)[0][1].shape[0], "b":list(test2)[1][1].shape[0], \
"c":list(test2)[2][1].shape[0], "d":0}
elif result6:
pm10dict = {"a":list(test2)[0][1].shape[0], "b":list(test2)[1][1].shape[0], "c":0, "d":0}
else:
print("Error in pm10dict. The following are the values of list10")
print(list10)
a = pm25dict['a']
b = pm25dict['b']
c = pm25dict['c']
d = pm25dict['d']
A = ((a + d)/(a+b+c+d)) * 100 # Accuracy of events
FAR = ((b)/(a+b)) * 100 # False event forecase
POD = ((a)/(a+c)) * 100 # Expected events
CSI = ((a)/(a+b+c)) * 100 # Events and event forecasts that were hits
FOM = ((c)/(a+c))*100 # Surprise events
FOH = ((a)/(a+b))*100 # Correct event forecasts
PON = ((d)/(b+d))*100 # Expected non events
POFD = ((b)/(b+d))*100 # Unexpected non events
try:
DFR = ((c)/(c+d))*100 # False non-event forecasts
FOCN = ((d)/(c+d))*100 # Correct non-event forecasts
pointer = 0
except ZeroDivisionError:
print ("Zero Division Error occurred, both c and d are zero")
pointer = 1
TSS = ((a)/(a+c)) - ((b)/(b+d)) # True skill statistic (Expected events - unexpected non-events)
Heidke = (((a+c)*(d-b))+((a-c)*(d+b)))/(((a+c)*(c+d))+((a+b)*(b+d)))
print("Performance metrics or Skill score for Unhealthy PM2.5 are:\n")
print("A = ", A)
print("FAR = ", FAR)
print("POD = ", POD)
print("CSI = ", CSI)
print("FOM = ", FOM)
print("FOH = ", FOH)
print("PON = ", PON)
print("POFD = ", POFD)
if pointer == 1:
print("DFR = infinite")
print("FOCN = inifinte")
elif pointer == 0:
print("DFR = ", DFR)
print("FOCN = ", FOCN)
print("TSS = ", TSS)
print("Heidke = ", Heidke, "\n")
a = pm10dict['a']
b = pm10dict['b']
c = pm10dict['c']
d = pm10dict['d']
A = ((a + d)/(a+b+c+d)) * 100 # Accuracy of events
FAR = ((b)/(a+b)) * 100 # False event forecase
POD = ((a)/(a+c)) * 100 # Expected events
CSI = ((a)/(a+b+c)) * 100 # Events and event forecasts that were hits
FOM = ((c)/(a+c))*100 # Surprise events
FOH = ((a)/(a+b))*100 # Correct event forecasts
PON = ((d)/(b+d))*100 # Expected non events
POFD = ((b)/(b+d))*100 # Unexpected non events
DFR = ((c)/(c+d))*100 # False non-event forecasts
FOCN = ((d)/(c+d))*100 # Correct non-event forecasts
TSS = ((a)/(a+c)) - ((b)/(b+d)) # True skill statistic (Expected events - unexpected non-events)
Heidke = (((a+c)*(d-b))+((a-c)*(d+b)))/(((a+c)*(c+d))+((a+b)*(b+d)))
print("Performance metrics or Skill score for Unhealthy PM10 are: \n")
print("A = ", A)
print("FAR = ", FAR)
print("POD = ", POD)
print("CSI = ", CSI)
print("FOM = ", FOM)
print("FOH = ", FOH)
print("PON = ", PON)
print("POFD = ", POFD)
print("DFR = ", DFR)
print("FOCN = ", FOCN)
print("TSS = ", TSS)
print("Heidke = ", Heidke)
get_unhealthy_score(df)
Performance metrics or Skill score for Unhealthy PM2.5 are: A = 91.89931350114416 FAR = 8.016491067338526 POD = 99.90049751243781 CSI = 91.89931350114416 FOM = 0.09950248756218905 FOH = 91.98350893266148 PON = 0.0 POFD = 100.0 DFR = 100.0 FOCN = 0.0 TSS = -0.0009950248756218638 Heidke = -0.0018132601121630897 Performance metrics or Skill score for Unhealthy PM10 are: A = 82.92906178489703 FAR = 12.761647535449022 POD = 87.53387533875339 CSI = 77.5975975975976 FOM = 12.466124661246612 FOH = 87.23835246455099 PON = 73.34273624823695 POFD = 26.657263751763043 DFR = 26.136363636363637 FOCN = 73.86363636363636 TSS = 0.6087661158699034 Heidke = 0.6098836215789037
## Single function to obtain the statistical performance measures
def get_stat_performance(df):
# mean bias
mb_pm25 = (df['mod_pm2'] - df['obs_pm2']).describe()[1]
mb_pm10 = (df['mod_pm10'] - df['obs_pm10']).describe()[1]
mb_aqi25 = (df['mod_aqi_pm2'] - df['obs_aqi_pm2']).describe()[1]
mb_aqi10 = (df['mod_aqi_pm10'] - df['obs_aqi_pm10']).describe()[1]
print("Mean bias :")
print("mean bias pm2.5 = ", mb_pm25)
print("mean bias pm10 = ", mb_pm10)
print("mean bias aqi_pm2.5 = ", mb_aqi25)
print("mean bias aqi_pm10 = ", mb_aqi10, "\n")
# fractional bias
fb_pm25 = 2 * ((df['mod_pm2'].describe()[1] - df['obs_pm2'].describe()[1]) / \
(df['mod_pm2'].describe()[1] + df['obs_pm2'].describe()[1]))
fb_pm10 = 2 * ((df['mod_pm10'].describe()[1] - df['obs_pm10'].describe()[1]) / \
(df['mod_pm10'].describe()[1] + df['obs_pm10'].describe()[1]))
fb_aqi25 = 2 * ((df['mod_aqi_pm2'].describe()[1] - df['obs_aqi_pm2'].describe()[1]) / \
(df['mod_aqi_pm2'].describe()[1] + df['obs_aqi_pm2'].describe()[1]))
fb_aqi10 = 2 * ((df['mod_aqi_pm10'].describe()[1] - df['obs_aqi_pm10'].describe()[1]) / \
(df['mod_aqi_pm10'].describe()[1] + df['obs_aqi_pm10'].describe()[1]))
print("fractional bias is :")
print("fractional bias pm2.5 = ", fb_pm25)
print("fractional bias pm10 = ", fb_pm10)
print("fractional bias aqi_pm2.5 = ", fb_aqi25)
print("fractional bias aqi_pm10 = ", fb_aqi10, "\n")
# Pearson's Corr Coeff
r_pm25 = ((df['obs_pm2'] - df['obs_pm2'].describe()[1]) * \
(df['mod_pm2'] - df['mod_pm2'].describe()[1])).describe()[1] / (df['mod_pm2'].describe()[2] * \
df['obs_pm2'].describe()[2])
r_pm10 = ((df['obs_pm10'] - df['obs_pm10'].describe()[1]) * \
(df['mod_pm10'] - df['mod_pm10'].describe()[1])).describe()[1] / (df['mod_pm10'].describe()[2] * \
df['obs_pm10'].describe()[2])
r_aqi25 = ((df['obs_aqi_pm2'] - df['obs_aqi_pm2'].describe()[1]) * \
(df['mod_aqi_pm2'] - df['mod_aqi_pm2'].describe()[1])).describe()[1] / (df['mod_aqi_pm2'].describe()[2] * df['obs_aqi_pm2'].describe()[2])
r_aqi10 = ((df['obs_aqi_pm10'] - df['obs_aqi_pm10'].describe()[1]) * \
(df['mod_aqi_pm10'] - df['mod_aqi_pm10'].describe()[1])).describe()[1] / (df['mod_aqi_pm10'].describe()[2] * df['obs_aqi_pm10'].describe()[2])
print("Correlation coefficient is :")
print("corr coeff pm2.5 = ", r_pm25)
print("corr coeff pm10 = ", r_pm10)
print("corr coeff aqi_pm2.5 = ", r_aqi25)
print("corr coeff aqi_pm10 = ", r_aqi10, "\n")
# RMSE
rmse_pm25 = np.sqrt(((df['mod_pm2'] - df['obs_pm2'])*(df['mod_pm2'] - df['obs_pm2'])).describe()[1])
rmse_pm10 = np.sqrt(((df['mod_pm10'] - df['obs_pm10'])*(df['mod_pm10'] - df['obs_pm10'])).describe()[1])
rmse_aqi25 = np.sqrt(((df['mod_aqi_pm2'] - df['obs_aqi_pm2'])*\
(df['mod_aqi_pm2'] - df['obs_aqi_pm2'])).describe()[1])
rmse_aqi10 = np.sqrt(((df['mod_aqi_pm10'] - df['obs_aqi_pm10'])*\
(df['mod_aqi_pm10'] - df['obs_aqi_pm10'])).describe()[1])
print("RMSE is :")
print("RMSE pm2.5 = ", rmse_pm25)
print("RMSE pm10 = ", rmse_pm10)
print("RMSE aqi_pm2.5 = ", rmse_aqi25)
print("RMSE aqi_pm10 = ", rmse_aqi10, "\n")
get_stat_performance(df)
Mean bias : mean bias pm2.5 = -2.065362128146454 mean bias pm10 = 5.371324180015254 mean bias aqi_pm2.5 = 7.81858830650743 mean bias aqi_pm10 = 2.2136541237604894 fractional bias is : fractional bias pm2.5 = -0.01132377682849823 fractional bias pm10 = 0.01727653692075466 fractional bias aqi_pm2.5 = 0.02306909479367706 fractional bias aqi_pm10 = 0.008097028076129106 Correlation coefficient is : corr coeff pm2.5 = 0.8016741851521504 corr coeff pm10 = 0.7734556076842606 corr coeff aqi_pm2.5 = 0.6962973879387541 corr coeff aqi_pm10 = 0.7692256648906234 RMSE is : RMSE pm2.5 = 48.17746556522817 RMSE pm10 = 74.81548023404702 RMSE aqi_pm2.5 = 61.480178362768186 RMSE aqi_pm10 = 79.70818576556577
df
| mod_pm2 | mod_pm2_stdev | mod_pm10 | mod_pm10_stdev | mod_aqi_pm2 | mod_aqi_pm10 | quality_mod_pm25 | quality_mod_pm10 | obs_pm2 | obs_pm2_stdev | obs_pm10 | obs_pm10_stdev | obs_aqi_pm2 | obs_aqi_pm10 | quality_obs_pm25 | quality_obs_pm10 | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| datetime | ||||||||||||||||
| 2020-11-01 23:00:00 | 127.504583 | 59.183333 | 219.647500 | 118.243750 | 305.771025 | 178.967350 | 4 | 2 | 179.448750 | 66.654167 | 335.294375 | 99.067708 | 345.716089 | 285.294375 | 4 | 3 |
| 2020-11-02 00:00:00 | 124.977083 | 59.065833 | 217.360833 | 118.301250 | 303.827377 | 177.458150 | 4 | 2 | 169.091667 | 63.607917 | 322.072708 | 97.627292 | 337.751492 | 272.072708 | 4 | 3 |
| 2020-11-02 01:00:00 | 122.575833 | 59.254167 | 215.445000 | 118.876667 | 301.980816 | 176.193700 | 4 | 2 | 159.435000 | 60.396667 | 311.264375 | 95.753542 | 330.325515 | 261.264375 | 4 | 3 |
| 2020-11-02 02:00:00 | 120.275833 | 59.528750 | 213.872083 | 119.806250 | 300.212116 | 175.155575 | 4 | 2 | 150.589583 | 58.083333 | 300.331042 | 95.091875 | 323.523390 | 250.331042 | 4 | 3 |
| 2020-11-02 03:00:00 | 117.873750 | 60.251250 | 212.123333 | 121.276667 | 292.819588 | 174.001400 | 3 | 2 | 143.366667 | 56.084167 | 291.565625 | 95.260208 | 317.968967 | 241.565625 | 4 | 3 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 2021-01-31 19:00:00 | 169.556250 | 94.367917 | 304.631250 | 181.144583 | 338.108756 | 254.631250 | 4 | 3 | 163.405000 | 70.887083 | 312.015000 | 130.423750 | 333.378445 | 262.015000 | 4 | 3 |
| 2021-01-31 20:00:00 | 171.562083 | 97.251250 | 308.490417 | 186.558750 | 339.651242 | 258.490417 | 4 | 3 | 166.218750 | 72.538750 | 314.311250 | 131.777500 | 335.542219 | 264.311250 | 4 | 3 |
| 2021-01-31 21:00:00 | 173.837083 | 98.862083 | 313.481667 | 190.334167 | 341.400717 | 263.481667 | 4 | 3 | 168.468750 | 73.830417 | 315.406250 | 129.229583 | 337.272469 | 265.406250 | 4 | 3 |
| 2021-01-31 22:00:00 | 178.011667 | 101.294583 | 322.102083 | 195.576667 | 344.610972 | 272.102083 | 4 | 3 | 171.528750 | 74.962083 | 319.125000 | 131.076667 | 339.625609 | 269.125000 | 4 | 3 |
| 2021-01-31 23:00:00 | 183.647083 | 105.107500 | 333.397917 | 202.817500 | 348.944607 | 283.397917 | 4 | 3 | 174.874583 | 75.262917 | 323.839583 | 131.555833 | 342.198555 | 273.839583 | 4 | 3 |
2185 rows × 16 columns
# Extracting the separate values for the winter months separately
dfnov = df.loc['2020-11-01':'2020-11-30']
dfdec = df.loc['2020-12-01':'2020-12-31']
dfjan = df.loc['2021-01-01':'2021-01-31']
dfnov
| mod_pm2 | mod_pm2_stdev | mod_pm10 | mod_pm10_stdev | mod_aqi_pm2 | mod_aqi_pm10 | quality_mod_pm25 | quality_mod_pm10 | obs_pm2 | obs_pm2_stdev | obs_pm10 | obs_pm10_stdev | obs_aqi_pm2 | obs_aqi_pm10 | quality_obs_pm25 | quality_obs_pm10 | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| datetime | ||||||||||||||||
| 2020-11-01 23:00:00 | 127.504583 | 59.183333 | 219.647500 | 118.243750 | 305.771025 | 178.967350 | 4 | 2 | 179.448750 | 66.654167 | 335.294375 | 99.067708 | 345.716089 | 285.294375 | 4 | 3 |
| 2020-11-02 00:00:00 | 124.977083 | 59.065833 | 217.360833 | 118.301250 | 303.827377 | 177.458150 | 4 | 2 | 169.091667 | 63.607917 | 322.072708 | 97.627292 | 337.751492 | 272.072708 | 4 | 3 |
| 2020-11-02 01:00:00 | 122.575833 | 59.254167 | 215.445000 | 118.876667 | 301.980816 | 176.193700 | 4 | 2 | 159.435000 | 60.396667 | 311.264375 | 95.753542 | 330.325515 | 261.264375 | 4 | 3 |
| 2020-11-02 02:00:00 | 120.275833 | 59.528750 | 213.872083 | 119.806250 | 300.212116 | 175.155575 | 4 | 2 | 150.589583 | 58.083333 | 300.331042 | 95.091875 | 323.523390 | 250.331042 | 4 | 3 |
| 2020-11-02 03:00:00 | 117.873750 | 60.251250 | 212.123333 | 121.276667 | 292.819588 | 174.001400 | 3 | 2 | 143.366667 | 56.084167 | 291.565625 | 95.260208 | 317.968967 | 241.565625 | 4 | 3 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 2020-11-30 19:00:00 | 186.157917 | 107.792083 | 344.002500 | 215.190000 | 350.875438 | 294.002500 | 4 | 3 | 172.614167 | 62.624167 | 316.977083 | 105.162083 | 340.460294 | 266.977083 | 4 | 3 |
| 2020-11-30 20:00:00 | 186.961667 | 106.740417 | 344.153333 | 212.838750 | 351.493522 | 294.153333 | 4 | 3 | 174.405417 | 63.549167 | 319.342500 | 106.252500 | 341.837765 | 269.342500 | 4 | 3 |
| 2020-11-30 21:00:00 | 188.347500 | 106.011250 | 345.242500 | 210.727500 | 352.559228 | 295.242500 | 4 | 3 | 176.128333 | 63.925417 | 321.950417 | 107.049167 | 343.162688 | 271.950417 | 4 | 3 |
| 2020-11-30 22:00:00 | 190.129583 | 105.678750 | 346.928333 | 209.244583 | 353.929650 | 296.928333 | 4 | 3 | 177.682500 | 64.547500 | 324.535417 | 107.892500 | 344.357843 | 274.535417 | 4 | 3 |
| 2020-11-30 23:00:00 | 191.920833 | 105.255833 | 348.549167 | 207.751667 | 355.307121 | 298.549167 | 4 | 3 | 179.240833 | 64.936667 | 327.202500 | 108.555833 | 345.556201 | 277.202500 | 4 | 3 |
697 rows × 16 columns
November
get_critical_score(dfnov)
Performance metrics or Skill score for Critical PM2.5 are: A = 90.53084648493544 FAR = 15.950920245398773 POD = 77.40112994350282 CSI = 67.48768472906403 FOM = 22.598870056497177 FOH = 84.04907975460122 PON = 95.0 POFD = 5.0 DFR = 7.490636704119851 FOCN = 92.50936329588015 TSS = 0.7240112994350282 Heidke = 0.7434040986624125 Performance metrics or Skill score for Critical PM10 are: A = 86.80057388809182 FAR = 17.333333333333336 POD = 65.26315789473685 CSI = 57.407407407407405 FOM = 34.73684210526316 FOH = 82.66666666666667 PON = 94.87179487179486 POFD = 5.128205128205128 DFR = 12.065813528336381 FOCN = 87.93418647166362 TSS = 0.6013495276653171 Heidke = 0.6437159684409379
get_veryunhealthy_score(dfnov)
Performance metrics or Skill score for Very Unhealthy PM2.5 are: A = 79.62697274031564 FAR = 18.016528925619834 POD = 93.76181474480151 CSI = 77.74294670846395 FOM = 6.238185255198488 FOH = 81.98347107438016 PON = 35.11904761904761 POFD = 64.88095238095238 DFR = 35.869565217391305 FOCN = 64.13043478260869 TSS = 0.28880862363849125 Heidke = 0.3415254011762514 Performance metrics or Skill score for Very Unhealthy PM10 are: A = 87.66140602582496 FAR = 20.189274447949526 POD = 92.0 CSI = 74.63126843657817 FOM = 8.0 FOH = 79.81072555205047 PON = 84.83412322274881 POFD = 15.165876777251185 DFR = 5.7894736842105265 FOCN = 94.21052631578948 TSS = 0.7683412322274882 Heidke = 0.7484324768963462
get_unhealthy_score(dfnov)
Zero Division Error occurred, both c and d are zero Performance metrics or Skill score for Unhealthy PM2.5 are: A = 87.51793400286944 FAR = 12.48206599713056 POD = 100.0 CSI = 87.51793400286944 FOM = 0.0 FOH = 87.51793400286944 PON = 0.0 POFD = 100.0 DFR = infinite FOCN = inifinte TSS = 0.0 Heidke = 0.0 Performance metrics or Skill score for Unhealthy PM10 are: A = 83.78766140602582 FAR = 14.717741935483872 POD = 91.36069114470843 CSI = 78.9179104477612 FOM = 8.639308855291576 FOH = 85.28225806451613 PON = 68.80341880341881 POFD = 31.196581196581196 DFR = 19.900497512437813 FOCN = 80.09950248756219 TSS = 0.6016410994812723 Heidke = 0.6233819640696802
get_stat_performance(dfnov)
Mean bias : mean bias pm2.5 = 0.03189383070300609 mean bias pm10 = 6.357450382592061 mean bias aqi_pm2.5 = 16.448663093809785 mean bias aqi_pm10 = 0.43214548660927865 fractional bias is : fractional bias pm2.5 = 0.00016229163379654051 fractional bias pm10 = 0.018620332377533785 fractional bias aqi_pm2.5 = 0.0472993153453743 fractional bias aqi_pm10 = 0.0013984745962575572 Correlation coefficient is : corr coeff pm2.5 = 0.8827578732017671 corr coeff pm10 = 0.859597493091373 corr coeff aqi_pm2.5 = 0.7921594001849241 corr coeff aqi_pm10 = 0.859960123903495 RMSE is : RMSE pm2.5 = 52.67309806814286 RMSE pm10 = 74.14887472985313 RMSE aqi_pm2.5 = 73.01398649416242 RMSE aqi_pm10 = 79.76534377541405
December
get_critical_score(dfdec)
Performance metrics or Skill score for Critical PM2.5 are: A = 86.96236559139786 FAR = 64.86486486486487 POD = 34.66666666666667 CSI = 21.138211382113823 FOM = 65.33333333333333 FOH = 35.13513513513514 PON = 92.82511210762333 POFD = 7.174887892376682 DFR = 7.313432835820896 FOCN = 92.6865671641791 TSS = 0.2749177877428999 Heidke = 0.2765547936966197 Performance metrics or Skill score for Critical PM10 are: A = 85.48387096774194 FAR = 77.52808988764045 POD = 33.89830508474576 CSI = 15.625 FOM = 66.10169491525424 FOH = 22.47191011235955 PON = 89.92700729927007 POFD = 10.072992700729927 DFR = 5.9541984732824424 FOCN = 94.04580152671755 TSS = 0.23825312384015834 Heidke = 0.1933340026101797
get_veryunhealthy_score(dfdec)
Performance metrics or Skill score for Very Unhealthy PM2.5 are: A = 77.28494623655914 FAR = 12.131715771230503 POD = 83.66336633663366 CSI = 75.0 FOM = 16.33663366336634 FOH = 87.8682842287695 PON = 49.275362318840585 POFD = 50.72463768115942 DFR = 59.2814371257485 FOCN = 40.7185628742515 TSS = 0.32938728655474236 Heidke = 0.30466520671577413 Performance metrics or Skill score for Very Unhealthy PM10 are: A = 70.43010752688173 FAR = 41.702127659574465 POD = 52.8957528957529 CSI = 38.37535014005603 FOM = 47.10424710424711 FOH = 58.29787234042553 PON = 79.79381443298969 POFD = 20.20618556701031 DFR = 23.968565815324165 FOCN = 76.03143418467585 TSS = 0.3268956732874259 Heidke = 0.3341090127987112
get_unhealthy_score(dfdec)
Performance metrics or Skill score for Unhealthy PM2.5 are: A = 95.16129032258065 FAR = 4.5822102425876015 POD = 99.71830985915493 CSI = 95.16129032258065 FOM = 0.28169014084507044 FOH = 95.4177897574124 PON = 0.0 POFD = 100.0 DFR = 100.0 FOCN = 0.0 TSS = -0.0028169014084507005 Heidke = -0.005103572500750525 Performance metrics or Skill score for Unhealthy PM10 are: A = 82.39247311827957 FAR = 8.13953488372093 POD = 84.19182948490231 CSI = 78.34710743801652 FOM = 15.808170515097691 FOH = 91.86046511627907 PON = 76.79558011049724 POFD = 23.204419889502763 DFR = 39.03508771929825 FOCN = 60.96491228070175 TSS = 0.6098740959539956 Heidke = 0.5604978354978355
get_stat_performance(dfdec)
Mean bias : mean bias pm2.5 = -1.1914572132616468 mean bias pm10 = 4.230082325268838 mean bias aqi_pm2.5 = 0.7878714045698951 mean bias aqi_pm10 = 6.593466887880833 fractional bias is : fractional bias pm2.5 = -0.006656020721939076 fractional bias pm10 = 0.013587520743564936 fractional bias aqi_pm2.5 = 0.002330534678576087 fractional bias aqi_pm10 = 0.024326974825282274 Correlation coefficient is : corr coeff pm2.5 = 0.6609465950517727 corr coeff pm10 = 0.6221830256537176 corr coeff aqi_pm2.5 = 0.6472942115837137 corr coeff aqi_pm10 = 0.5592858674618498 RMSE is : RMSE pm2.5 = 48.21677786957834 RMSE pm10 = 82.05084226202382 RMSE aqi_pm2.5 = 48.56666665296193 RMSE aqi_pm10 = 93.24747657785929
January
get_critical_score(dfjan)
Performance metrics or Skill score for Critical PM2.5 are: A = 91.53225806451613 FAR = 1.7241379310344827 POD = 47.89915966386555 CSI = 47.5 FOM = 52.10084033613446 FOH = 98.27586206896551 PON = 99.83999999999999 POFD = 0.16 DFR = 9.037900874635568 FOCN = 90.96209912536443 TSS = 0.4773915966386555 Heidke = 0.6023887889789963 Performance metrics or Skill score for Critical PM10 are: A = 93.68279569892472 FAR = 13.636363636363635 POD = 48.10126582278481 CSI = 44.70588235294118 FOM = 51.89873417721519 FOH = 86.36363636363636 PON = 99.09774436090225 POFD = 0.9022556390977444 DFR = 5.857142857142858 FOCN = 94.14285714285714 TSS = 0.4719901018368707 Heidke = 0.5864711447492904
get_veryunhealthy_score(dfjan)
Performance metrics or Skill score for Very Unhealthy PM2.5 are: A = 78.76344086021506 FAR = 16.534181240063592 POD = 90.67357512953367 CSI = 76.86676427525623 FOM = 9.32642487046632 FOH = 83.46581875993641 PON = 36.96969696969697 POFD = 63.030303030303024 DFR = 46.95652173913044 FOCN = 53.04347826086957 TSS = 0.27643272099230654 Heidke = 0.31001936960732523 Performance metrics or Skill score for Very Unhealthy PM10 are: A = 81.45161290322581 FAR = 36.2962962962963 POD = 49.142857142857146 CSI = 38.392857142857146 FOM = 50.857142857142854 FOH = 63.70370370370371 PON = 91.3884007029877 POFD = 8.611599297012303 DFR = 14.614121510673234 FOCN = 85.38587848932676 TSS = 0.4053125784584484 Heidke = 0.4401439555046622
get_unhealthy_score(dfjan)
Zero Division Error occurred, both c and d are zero Performance metrics or Skill score for Unhealthy PM2.5 are: A = 92.74193548387096 FAR = 7.258064516129033 POD = 100.0 CSI = 92.74193548387096 FOM = 0.0 FOH = 92.74193548387096 PON = 0.0 POFD = 100.0 DFR = infinite FOCN = inifinte TSS = 0.0 Heidke = 0.0 Performance metrics or Skill score for Unhealthy PM10 are: A = 82.66129032258065 FAR = 15.778251599147122 POD = 87.77777777777777 CSI = 75.38167938931298 FOM = 12.222222222222221 FOH = 84.22174840085287 PON = 74.82993197278913 POFD = 25.170068027210885 DFR = 20.0 FOCN = 80.0 TSS = 0.6260770975056689 Heidke = 0.6331697472824841
get_stat_performance(dfjan)
Mean bias : mean bias pm2.5 = -4.9040350582437355 mean bias pm10 = 5.588735439068095 mean bias aqi_pm2.5 = 6.764409876792125 mean bias aqi_pm10 = -0.49719154345878214 fractional bias is : fractional bias pm2.5 = -0.02842237238832705 fractional bias pm10 = 0.019825921180154953 fractional bias aqi_pm2.5 = 0.02040557940649959 fractional bias aqi_pm10 = -0.002051324815328349 Correlation coefficient is : corr coeff pm2.5 = 0.7971946196191955 corr coeff pm10 = 0.7561275801842761 corr coeff aqi_pm2.5 = 0.6427104111875994 corr coeff aqi_pm10 = 0.7754802858789724 RMSE is : RMSE pm2.5 = 43.50325303062424 RMSE pm10 = 67.4936952276184 RMSE aqi_pm2.5 = 61.21820009177507 RMSE aqi_pm10 = 63.27051843878906
# Plot of PM2.5 model and data for Nov, Dec and Jan with errorbars
fig = plt.figure(figsize=(20,15))
plt.subplot(311)
sns.lineplot(data = dfnov, x='datetime', y='mod_pm2', err_style='band', label='model', color='red')
# plt.errorbar(dfmod_nov.index, dfmod_nov['mod_pm2'], yerr=dfmod_nov['mod_pm2_stdev'], alpha=0.15, ecolor='blue', fmt='o', mfc='blue', markersize=8, capsize=10)
plt.fill_between(dfnov.index, dfnov['mod_pm2'] + dfnov['mod_pm2_stdev'], dfnov['mod_pm2'] - dfnov['mod_pm2_stdev'], alpha=0.15, color='red')
sns.lineplot(data = dfnov, x='datetime', y='obs_pm2', err_style='band', label='observed', color='blue')
# plt.errorbar(dfobs_nov.index, dfobs_nov['obs_pm2'], yerr=dfobs_nov['obs_pm2_stdev'], alpha=0.15, ecolor='green', fmt='o', mfc='green', markersize=8, capsize=10)
plt.fill_between(dfnov.index, dfnov['obs_pm2'] + dfnov['obs_pm2_stdev'], dfnov['obs_pm2'] - dfnov['obs_pm2_stdev'], alpha=0.15, color='blue')
plt.ylabel('PM2.5 concentration (in $\mu g/m^3$)', fontsize=14)
plt.xlabel('Time', fontsize=14)
plt.legend(loc=1, prop={'size': 15})
plt.title('November 2020', fontsize=18)
plt.subplot(312)
sns.lineplot(data = dfdec, x='datetime', y='mod_pm2', err_style='band', label='model', color='red')
# plt.errorbar(dfdec.index, dfdec['mod_pm2'], yerr=dfdec['mod_pm2_stdev'], alpha=0.15, ecolor='blue', fmt='o', mfc='blue', markersize=8, capsize=10)
plt.fill_between(dfdec.index, dfdec['mod_pm2'] + dfdec['mod_pm2_stdev'], dfdec['mod_pm2'] - dfdec['mod_pm2_stdev'], alpha=0.15, color='red')
sns.lineplot(data = dfdec, x='datetime', y='obs_pm2', err_style='band', label='observed', color='blue')
# plt.errorbar(dfdec.index, dfdec['obs_pm2'], yerr=dfdec['obs_pm2_stdev'], alpha=0.15, ecolor='green', fmt='o', mfc='green', markersize=8, capsize=10)
plt.fill_between(dfdec.index, dfdec['obs_pm2'] + dfdec['obs_pm2_stdev'], dfdec['obs_pm2'] - dfdec['obs_pm2_stdev'], alpha=0.15, color='blue')
plt.ylabel('PM2.5 concentration (in $\mu g/m^3$)', fontsize=14)
plt.xlabel('Time', fontsize=14)
plt.legend(loc=1, prop={'size': 15})
plt.title('December 2020', fontsize=18)
plt.subplot(313)
sns.lineplot(data = dfjan, x='datetime', y='mod_pm2', err_style='band', label='model', color='red')
# plt.errorbar(dfjan.index, dfjan['mod_pm2'], yerr=dfjan['mod_pm2_stdev'], alpha=0.15, ecolor='blue', fmt='o', mfc='blue', markersize=8, capsize=10)
plt.fill_between(dfjan.index, dfjan['mod_pm2'] + dfjan['mod_pm2_stdev'], dfjan['mod_pm2'] - dfjan['mod_pm2_stdev'], alpha=0.15, color='red')
sns.lineplot(data = dfjan, x='datetime', y='obs_pm2', err_style='band', label='observed', color='blue')
# plt.errorbar(dfjan.index, dfjan['obs_pm2'], yerr=dfjan['obs_pm2_stdev'], alpha=0.15, ecolor='green', fmt='o', mfc='green', markersize=8, capsize=10)
plt.fill_between(dfjan.index, dfjan['obs_pm2'] + dfjan['obs_pm2_stdev'], dfjan['obs_pm2'] - dfjan['obs_pm2_stdev'], alpha=0.15, color='blue')
plt.ylabel('PM2.5 concentration (in $\mu g/m^3$)', fontsize=14)
plt.xlabel('Time', fontsize=14)
plt.legend(loc=1, prop={'size': 15})
plt.title('January 2021', fontsize=18)
fig.suptitle('PM2.5 concentration over Delhi', fontsize=24, y=0.99)
fig.tight_layout()
plt.savefig('./images/pm25ts_err_running.png')
# Plot of PM10 model and data for Nov, Dec and Jan with errorbars
fig = plt.figure(figsize=(20,15))
plt.subplot(311)
sns.lineplot(data = dfnov, x='datetime', y='mod_pm10', err_style='band', label='model', color='red')
# plt.errorbar(dfnov.index, dfnov['mod_pm10'], yerr=dfnov['mod_pm10_stdev'], alpha=0.15, ecolor='blue', fmt='o', mfc='blue', markersize=8, capsize=10)
plt.fill_between(dfnov.index, dfnov['mod_pm10'] + dfnov['mod_pm10_stdev'], dfnov['mod_pm10'] - dfnov['mod_pm10_stdev'], alpha=0.15, color='red')
sns.lineplot(data = dfnov, x='datetime', y='obs_pm10', err_style='band', label='observed', color='blue')
# plt.errorbar(dfnov.index, dfnov['obs_pm10'], yerr=dfnov['obs_pm10_stdev'], alpha=0.15, ecolor='green', fmt='o', mfc='green', markersize=8, capsize=10)
plt.fill_between(dfnov.index, dfnov['obs_pm10'] + dfnov['obs_pm10_stdev'], dfnov['obs_pm10'] - dfnov['obs_pm10_stdev'], alpha=0.15, color='blue')
plt.ylabel('PM10 concentration (in $\mu g/m^3$)', fontsize=14)
plt.xlabel('Time', fontsize=14)
plt.legend(loc=1, prop={'size': 15})
plt.title('November 2020', fontsize=18)
plt.subplot(312)
sns.lineplot(data = dfdec, x='datetime', y='mod_pm10', err_style='band', label='model', color='red')
# plt.errorbar(dfnov.index, dfnov['mod_pm10'], yerr=dfnov['mod_pm10_stdev'], alpha=0.15, ecolor='blue', fmt='o', mfc='blue', markersize=8, capsize=10)
plt.fill_between(dfdec.index, dfdec['mod_pm10'] + dfdec['mod_pm10_stdev'], dfdec['mod_pm10'] - dfdec['mod_pm10_stdev'], alpha=0.15, color='red')
sns.lineplot(data = dfdec, x='datetime', y='obs_pm10', err_style='band', label='observed', color='blue')
# plt.errorbar(dfnov.index, dfnov['obs_pm10'], yerr=dfnov['obs_pm10_stdev'], alpha=0.15, ecolor='green', fmt='o', mfc='green', markersize=8, capsize=10)
plt.fill_between(dfdec.index, dfdec['obs_pm10'] + dfdec['obs_pm10_stdev'], dfdec['obs_pm10'] - dfdec['obs_pm10_stdev'], alpha=0.15, color='blue')
plt.ylabel('PM10 concentration (in $\mu g/m^3$)', fontsize=14)
plt.xlabel('Time', fontsize=14)
plt.legend(loc=1, prop={'size': 15})
plt.title('December 2020', fontsize=18)
plt.subplot(313)
sns.lineplot(data = dfjan, x='datetime', y='mod_pm10', err_style='band', label='model', color='red')
# plt.errorbar(dfnov.index, dfnov['mod_pm10'], yerr=dfnov['mod_pm10_stdev'], alpha=0.15, ecolor='blue', fmt='o', mfc='blue', markersize=8, capsize=10)
plt.fill_between(dfjan.index, dfjan['mod_pm10'] + dfjan['mod_pm10_stdev'], dfjan['mod_pm10'] - dfjan['mod_pm10_stdev'], alpha=0.15, color='red')
sns.lineplot(data = dfjan, x='datetime', y='obs_pm10', err_style='band', label='observed', color='blue')
# plt.errorbar(dfnov.index, dfnov['obs_pm10'], yerr=dfnov['obs_pm10_stdev'], alpha=0.15, ecolor='green', fmt='o', mfc='green', markersize=8, capsize=10)
plt.fill_between(dfjan.index, dfjan['obs_pm10'] + dfjan['obs_pm10_stdev'], dfjan['obs_pm10'] - dfjan['obs_pm10_stdev'], alpha=0.15, color='blue')
plt.ylabel('PM10 concentration (in $\mu g/m^3$)', fontsize=14)
plt.xlabel('Time', fontsize=14)
plt.legend(loc=1, prop={'size': 15})
plt.title('January 2021', fontsize=18)
fig.suptitle('PM10 concentration over Delhi', fontsize=24, y=0.99)
fig.tight_layout()
plt.savefig('./images/pm10ts_err_running.png')
# Plot of AQI_PM2.5 model and data Time series for Nov, Dec and Jan
fig = plt.figure(figsize=(20,15))
plt.subplot(311)
sns.lineplot(data = dfnov, x='datetime', y='mod_aqi_pm2', err_style='band', label='model', color='red')
sns.lineplot(data = dfnov, x='datetime', y='obs_aqi_pm2', err_style='band', label='observed', color='blue')
plt.ylabel('AQI for PM2.5', fontsize=14)
plt.xlabel('Time', fontsize=14)
plt.legend(loc=1, prop={'size': 15})
plt.title('November 2020', fontsize=18)
plt.subplot(312)
sns.lineplot(data = dfdec, x='datetime', y='mod_aqi_pm2', err_style='band', label='model', color='red')
sns.lineplot(data = dfdec, x='datetime', y='obs_aqi_pm2', err_style='band', label='observed', color='blue')
plt.ylabel('AQI for PM2.5', fontsize=14)
plt.xlabel('Time', fontsize=14)
plt.legend(loc=1, prop={'size': 15})
plt.title('December 2020', fontsize=18)
plt.subplot(313)
sns.lineplot(data = dfjan, x='datetime', y='mod_aqi_pm2', err_style='band', label='model', color='red')
sns.lineplot(data = dfjan, x='datetime', y='obs_aqi_pm2', err_style='band', label='observed', color='blue')
plt.ylabel('AQI for PM2.5', fontsize=14)
plt.xlabel('Time', fontsize=14)
plt.legend(loc=1, prop={'size': 15})
plt.title('January 2021', fontsize=18)
fig.suptitle('AQI for PM2.5 over Delhi', fontsize=24, y=0.99)
fig.tight_layout()
plt.savefig('./images/aqi_pm25_ts.png')
# Plot of AQI_PM10 model and data Time series for Nov, Dec and Jan
fig = plt.figure(figsize=(20,15))
plt.subplot(311)
sns.lineplot(data = dfnov, x='datetime', y='mod_aqi_pm10', err_style='band', label='model', color='red')
sns.lineplot(data = dfnov, x='datetime', y='obs_aqi_pm10', err_style='band', label='observed', color='blue')
plt.ylabel('AQI for PM10', fontsize=14)
plt.xlabel('Time', fontsize=14)
plt.legend(loc=1, prop={'size': 15})
plt.title('November 2020', fontsize=18)
plt.subplot(312)
sns.lineplot(data = dfdec, x='datetime', y='mod_aqi_pm10', err_style='band', label='model', color='red')
sns.lineplot(data = dfdec, x='datetime', y='obs_aqi_pm10', err_style='band', label='observed', color='blue')
plt.ylabel('AQI for PM10', fontsize=14)
plt.xlabel('Time', fontsize=14)
plt.legend(loc=1, prop={'size': 15})
plt.title('December 2020', fontsize=18)
plt.subplot(313)
sns.lineplot(data = dfjan, x='datetime', y='mod_aqi_pm10', err_style='band', label='model', color='red')
sns.lineplot(data = dfjan, x='datetime', y='obs_aqi_pm10', err_style='band', label='observed', color='blue')
plt.ylabel('AQI for PM10', fontsize=14)
plt.xlabel('Time', fontsize=14)
plt.legend(loc=1, prop={'size': 15})
plt.title('January 2021', fontsize=18)
fig.suptitle('AQI for PM10 over Delhi', fontsize=24, y=0.99)
fig.tight_layout()
plt.savefig('./images/aqi_pm10_ts.png')